%%%%%%%% locate the group used to calculate the marginal effect. Find the
%%%%%%%% one has X similar to the whole sample.


G=532; %% # of groups

N=0;
Y_all=[];
X_all=[];
for g=1:G   

    %% Load X and Y %%%
    c1=['part1\data_part1_' int2str(g)];
    c2=['part2\data_part2_' int2str(g)];
    c3=['part3\data_part3_' int2str(g)];
    c4=['part4\data_part4_' int2str(g)];
    d1=load([c1 '.txt']);
    d2=load([c2 '.txt']);
    d3=load([c3 '.txt']);
    d4=load([c4 '.txt']);
    d4_x=d4(:,1:4);
    X=[d1 d2 d3 d4_x];
    X=[X X(:,1).^2/10];

    X_all=[X_all;X];
    clear X

    Y=d4(:,5);
    n=length(Y); %% n is the # of observations of group g
    Y_all=[Y_all;Y];
  
    N=N+n;

    clear Y;clear s;clear w1*;clear ww;clear www;clear c1;clear c2;clear c3;clear c4;clear d1;clear d2;clear d3;clear d4*
end

amean=mean(X_all)';
asd=std(X_all)';

% group by group
g;
g=G;
grp = 0;


N2=0;
diff_f=zeros(g,1);
    for i = 1:g
  
      %%% Load X and Y, %%%
    c1=['part1\data_part1_' int2str(i)];
    c2=['part2\data_part2_' int2str(i)];
    c3=['part3\data_part3_' int2str(i)];
    c4=['part4\data_part4_' int2str(i)];
    
    % add part 5 to part 36 for the school dummies.
    c5=['part5\data_part5_' int2str(i)];
    c6=['part6\data_part6_' int2str(i)];
    c7=['part7\data_part7_' int2str(i)];
    c8=['part8\data_part8_' int2str(i)];
    c9=['part9\data_part9_' int2str(i)];
    c10=['part10\data_part10_' int2str(i)];
    c11=['part11\data_part11_' int2str(i)];
    c12=['part12\data_part12_' int2str(i)];
    c13=['part13\data_part13_' int2str(i)];
    c14=['part14\data_part14_' int2str(i)];
    c15=['part15\data_part15_' int2str(i)];
    c16=['part16\data_part16_' int2str(i)];
    c17=['part17\data_part17_' int2str(i)];
    c18=['part18\data_part18_' int2str(i)];
    c19=['part19\data_part19_' int2str(i)];
    c20=['part20\data_part20_' int2str(i)];
    c21=['part21\data_part21_' int2str(i)];
    c22=['part22\data_part22_' int2str(i)];
    c23=['part23\data_part23_' int2str(i)];
    c24=['part24\data_part24_' int2str(i)];
    c25=['part25\data_part25_' int2str(i)];
    c26=['part26\data_part26_' int2str(i)];
    c27=['part27\data_part27_' int2str(i)];
    c28=['part28\data_part28_' int2str(i)];
    c29=['part29\data_part29_' int2str(i)];
    c30=['part30\data_part30_' int2str(i)];
    c31=['part31\data_part31_' int2str(i)];
    c32=['part32\data_part32_' int2str(i)];
    c33=['part33\data_part33_' int2str(i)];
    c34=['part34\data_part34_' int2str(i)];
    c35=['part35\data_part35_' int2str(i)];
    c36=['part36\data_part36_' int2str(i)];
  
    
    d1=load([c1 '.txt']);
    d2=load([c2 '.txt']);
    d3=load([c3 '.txt']);
    d4=load([c4 '.txt']);
    
    % add the school dummies 
    d5= load([ c5 '.txt']);  
    d6= load([ c6 '.txt']); 
    d7= load([ c7 '.txt']);  
    d8= load([ c8 '.txt']); 
    d9= load([ c9 '.txt']); 
    d10=load([ c10  '.txt']); 
    d11=load([ c11  '.txt']); 
    d12=load([ c12  '.txt']); 
    d13=load([ c13  '.txt']); 
    d14=load([ c14  '.txt']); 
    d15=load([ c15  '.txt']); 
    d16=load([ c16  '.txt']); 
    d17=load([ c17  '.txt']); 
    d18=load([ c18  '.txt']); 
    d19=load([ c19  '.txt']); 
    d20=load([ c20  '.txt']); 
    d21=load([ c21  '.txt']); 
    d22=load([ c22  '.txt']); 
    d23=load([ c23  '.txt']); 
    d24=load([ c24  '.txt']); 
    d25=load([ c25  '.txt']); 
    d26=load([ c26  '.txt']); 
    d27=load([ c27  '.txt']); 
    d28=load([ c28  '.txt']); 
    d29=load([ c29  '.txt']); 
    d30=load([ c30  '.txt']); 
    d31=load([ c31  '.txt']); 
    d32=load([ c32  '.txt']); 
    d33=load([ c33  '.txt']); 
    d34=load([ c34  '.txt']); 
    d35=load([ c35  '.txt']); 
    d36=load([ c36  '.txt']); 
  
    
    
    d4_x=d4(:,1:4);
    Xx=[d1 d2 d3 d4_x];
  
    X=[Xx(:,1:2) Xx(:,3:size(Xx,2)) Xx(:,1).^2/10]; 
   
    
   Xg=mean(X)';
   sd_Xg=std(X)';
   diff=(amean-Xg)'*(amean-Xg);
   diff_f(i)= diff;

    Yy=d4(:,5);
 
    Y=2*Yy-1;
    Yg=mean(Y)';
    
    mr=length(Y); %% mr is the # of observations of group g 
    N2=N2+mr; 
     
    grp = grp+1;

    
        dat(grp).yt = Y;
        dat(grp).xg = Xg;
       dat(grp).sd_Xg=sd_Xg;
       
    end

  N2  
  size(diff_f)
  min(diff_f)                      % the minimum value
  [r c]=find(diff_f==min(diff_f))  % row and column
  size(dat(r).yt)                  % this group size
 [amean asd dat(r).xg dat(r).sd_Xg]  % mean of the sample and the group
  
   